measures_of_interest %>%
  ggplot(aes(x = date, y = count)) +
  geom_line(aes(group = measure, color = measure)) +
  geom_point(aes(group = seq_along(date), color = measure)) +
  transition_reveal(date) +
  scale_x_date(breaks = "4 months", labels = date_format("%b-%Y")) +
  scale_colour_discrete(name = "Membership Type",
                        breaks = c("ct_annual_members", "ct_single_day_passes",
                                   "ct_single_trip_passes"),
                        labels = c("Annual Passes Renewed or Purchased",
                                   "Single-Day Passes Purchased",
                                   "Single-Trip Passes Purchased")) +
  labs(x = "Date", y = "Count") +
  ggtitle("CitiBike Memberships Purchased by Month in NYC and New Jersey City") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

manhattan_rides_df <- read_csv("manhattan_rides.csv")

manhattan_rides_df <-
  manhattan_rides_df %>% 
  mutate(
    day_of_week = factor(day_of_week, ordered = T, 
                         levels = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")), 
    year = factor(year), 
    age_group = factor(age_group, ordered = T,
                       levels = c("18-25","26-35", "36-45", "46-55", "56-65", "66-85")), 
    gender = type.convert(gender, as.is = F))

manhattan_rides_df %>% 
  group_by(age_group) %>% 
  summarize(min = min(age), max = max(age), obs = n())
## # A tibble: 6 x 4
##   age_group   min   max    obs
##   <ord>     <dbl> <dbl>  <int>
## 1 18-25        18    25  35068
## 2 26-35        26    35 102948
## 3 36-45        36    45  56694
## 4 46-55        46    55  43430
## 5 56-65        56    65  26232
## 6 66-85        66    85   6734
manhattan_rides_df %>% 
  group_by(year) %>% 
  mutate(
    month = month(starttime, label = T)
  ) %>% 
  filter(tripduration < 2500) %>% 
  plot_ly(
    x = ~month, 
    y = ~trip_min,
    color = ~year,
    type = "box") %>% 
  layout(
    boxmode = "group",
    title = "Duration of Citibike Rides by Month",
    xaxis = list(title = "Month"),
    yaxis = list(title = "Trip Duration in Minutes")
    )

Looks like maybe the overall length of trips in 2019 was more consistent. 2020 had a bump in duration of rides, starting in April. Overall, trip length seems more variable in 2020.

manhattan_rides_df %>% 
  group_by(year) %>% 
  mutate(
    month = month(starttime, label = T)
  ) %>% 
  group_by(year, month) %>% 
  summarise(obs = n()) %>% 
  plot_ly(
    x = ~month, 
    y = ~obs, 
    color = ~year,
    type = "scatter",
    mode = "lines") %>%  
  layout(
    title = "Number of Citibike Rides per Month",
    xaxis = list(title = "Month"),
    yaxis = list(title = "Rides")
  )

Huge drop in monthly trips in April 2020. Lockdown started mid/late March so this coincides with people transitioning to WFH and largely staying inside to minimize contacts. The ride numbers bounce back quite a bit after this but not to 2019 levels.

citi_pc_change = 
  manhattan_rides_df %>% 
  mutate(date = format(stoptime, format = "%m-%d-%Y")) %>% 
  group_by(stop_date, year) %>% 
  summarize(daily_rides = n()) %>% 
  ungroup() %>% 
  group_by(stop_date) %>% 
  arrange(year, .by_group = T) %>% 
  mutate(percent_change = (daily_rides/lag(daily_rides) - 1) * 100) %>%
  filter(year == 2020) %>% 
  select(date = stop_date, percent_change) %>% 
  mutate(transit_system = "citi_bike",
         date = paste0("2020-", date),
         date = as.Date(date, "%Y-%m-%d"))
## `summarise()` has grouped output by 'stop_date'. You can override using the `.groups` argument.
# Using ggplot
post_covid_transit = citi_pc_change %>% 
  ungroup() %>% 
  drop_na() %>% 
  ggplot(aes(x = date, y = percent_change, color = transit_system)) +
  geom_smooth(aes(color = transit_system))

ggplotly(post_covid_transit)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Ridership data for 2019/2020 Manhattan
turnstiles_2019_m = read_csv("2019-turnstile.csv") %>% 
  filter(borough == "M") %>% 
  mutate(gtfs_latitude = as.numeric(gtfs_latitude),
         gtfs_longitude = as.numeric(gtfs_longitude))
## Rows: 159384 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (9): stop_name, daytime_routes, division, line, borough, structure, gtf...
## dbl  (2): entries, exits
## date (1): date
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
turnstiles_2020_m = read_csv("2020-turnstile.csv") %>% 
    filter(borough == "M") %>% 
    mutate(gtfs_latitude = as.numeric(gtfs_latitude),
         gtfs_longitude = as.numeric(gtfs_longitude))
## Rows: 159533 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (7): stop_name, daytime_routes, division, line, borough, structure, com...
## dbl  (4): gtfs_longitude, gtfs_latitude, entries, exits
## date (1): date
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
turnstiles_19_20_m = rbind(turnstiles_2019_m, turnstiles_2020_m)
## Warning: One or more parsing issues, see `problems()` for details
#Ridership data 3/1/2020 - today
ridership_covid_changes = read_csv("covid-ridership.csv") %>% 
  janitor::clean_names() %>% 
  mutate(date = as.Date(date, "%m/%d/%Y")) %>% 
  rename(buses_ter = buses_total_estimated_ridership) %>% 
  rename(lirr_ter = lirr_total_estimated_ridership) %>% 
  rename(metro_north_ter = metro_north_total_estimated_ridership) %>% 
  rename(subways_ter = subways_total_estimated_ridership) %>% 
  rename(subways_pc = subways_percent_change_from_pre_pandemic_equivalent_day) %>% 
  rename(metro_north_pc = metro_north_percent_change_from_2019_monthly_weekday_saturday_sunday_average) %>% 
  rename(lirr_pc = lirr_percent_change_from_2019_monthly_weekday_saturday_sunday_average) %>% 
  rename(buses_pc = buses_percent_change_from_pre_pandemic_equivalent_day) %>% 
  rename(bridges_and_tunnels_pc = bridges_and_tunnels_percent_change_from_pre_pandemic_equivalent_day) %>% 
  rename(access_a_ride_ter = access_a_ride_total_scheduled_trips) %>% 
  rename(access_a_ride_pc = access_a_ride_percent_change_from_pre_pandemic_equivalent_day)
## Rows: 628 Columns: 13
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (8): Date, Subways: % Change From Pre-Pandemic Equivalent Day, Buses: % ...
## dbl (5): Subways: Total Estimated Ridership, Buses: Total Estimated Ridershi...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
ridership_covid_changes_2020 = ridership_covid_changes %>% 
  filter(date <= as.Date('2020-12-31'))

ridership_covid_pc_tidy = 
  ridership_covid_changes_2020 %>% 
  
  select(date, access_a_ride_pc, bridges_and_tunnels_pc, buses_pc, lirr_pc, metro_north_pc, subways_pc) %>% 
  
  pivot_longer(
    c(access_a_ride_pc:subways_pc), 
    names_to = "transit_system",
    values_to = "percent_change"
  ) %>% 
  
   mutate(transit_system = gsub("_pc", "", transit_system),
          percent_change = gsub("%", "", percent_change),
          percent_change = as.numeric(percent_change))
ridership_pc_change = 
  bind_rows(ridership_covid_pc_tidy, citi_pc_change)
# Using ggplot
post_covid_transit = ridership_pc_change %>% 
  ungroup() %>% 
  drop_na() %>% 
  filter(date >= as.Date('2020-03-01')) %>% 
  ggplot(aes(x = date, y = percent_change, color = transit_system)) +
  geom_smooth(aes(color = transit_system))

ggplotly(post_covid_transit)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Using plotly 
ridership_pc_change %>% 
  ungroup() %>% 
  drop_na() %>% 
  filter(date >= as.Date('2020-03-01')) %>% 
  plot_ly(
    x = ~date, 
    y = ~percent_change, 
    color = ~transit_system,
    type = "scatter",
    mode = "lines") %>% 
  layout(
    title = "Ridership Transit System Percent Change Following COVID",
    xaxis = list(title = "Date"),
    yaxis = list(title = "Percent Change")
  )